Data loading and processing

library(ggplot2)

metadata = read.csv("data/MGRB_paper_sample_metadata_20180713.csv", stringsAsFactors = FALSE)

metadata$cohort[metadata$cohort == "SweGen_NSPHS"] = "SweGen"
metadata$cohort[metadata$cohort == "SweGen_STR"] = "SweGen"

metadata = metadata[
    (metadata$cohort %in% c("ASPREE", "45andUp") & 
        metadata$Phase2.SampleQCTier %in% c(1, 2) & 
        metadata$successfullySequenced == 1 &
        metadata$Phase2.RelatednessDropped == 0) |
    (metadata$cohort %in% c("Cairns")) & 
    !apply(is.na(metadata[,c("isFemale", "AgeAtCollectionYears")]), 1, any),]

metadata$Frailty_GaitSpeedMs = 3/metadata$Frailty_GaitSpeed

metadata$sex = factor(c("male", "female")[metadata$isFemale + 1])

Exploratory analysis

Sex check

ggplot(metadata, aes(x = xCNLowDepth, y = yCNLowDepth, col = sex, pch = cohort)) + geom_point()

ggplot(metadata, aes(x = xCNLowDepth, y = yCNLowDepth, col = sex)) + geom_point() + facet_wrap(~ cohort)

One probable XXX. There are a number of possible XYs present also, but they have been removed by the successfullySequenced == 1 and !is.na(metadata$isFemale) filters. Some ASRB have lower X CN than the other cohorts, and ASRB in general has higher Y CN than the others; probable technical bias. This won’t affect the subsequent frailty comparisons as these are internal to ASPREE only.

Somatic metrics and frailty with age

ggplot(metadata, aes(x = AgeAtCollectionYears, y = TelSeqFinalRecalibrated, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

ggplot(metadata, aes(x = AgeAtCollectionYears, y = SomaSNV_S3_LD, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
## Warning: Removed 178 rows containing non-finite values (stat_smooth).
## Warning: Removed 178 rows containing missing values (geom_point).

ggplot(metadata, aes(x = AgeAtCollectionYears, y = mtCNLowDepth, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()

ggplot(metadata, aes(x = AgeAtCollectionYears, y = xCNLowDepth, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()

ggplot(metadata, aes(x = AgeAtCollectionYears, y = yCNLowDepth, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()

ggplot(metadata, aes(x = AgeAtCollectionYears, y = mity_LowDepth_NumVars, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
## Warning: Removed 204 rows containing non-finite values (stat_smooth).
## Warning: Removed 204 rows containing missing values (geom_point).

ggplot(metadata, aes(x = AgeAtCollectionYears, y = SomaCNV_ClonalFraction, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
## Warning: Removed 1177 rows containing non-finite values (stat_smooth).
## Warning: Removed 1177 rows containing missing values (geom_point).

ggplot(metadata, aes(x = AgeAtCollectionYears, y = CH_any_10pct, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "gam", formula = y ~ x, method.args = list(family = "binomial")) + theme_bw()
## Warning: Removed 1177 rows containing non-finite values (stat_smooth).

## Warning: Removed 1177 rows containing missing values (geom_point).

ggplot(metadata[metadata$CH_any_10pct == 1,], aes(x = AgeAtCollectionYears, y = SomaCNV_ClonalFraction, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", se = FALSE) + theme_bw()
## Warning: Removed 1177 rows containing non-finite values (stat_smooth).

## Warning: Removed 1177 rows containing missing values (geom_point).

ggplot(metadata, aes(x = AgeAtCollectionYears, y = Frailty_GripStrength, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess") + theme_bw()
## Warning: Removed 1494 rows containing non-finite values (stat_smooth).
## Warning: Removed 1494 rows containing missing values (geom_point).

ggplot(metadata, aes(x = AgeAtCollectionYears, y = Frailty_GaitSpeedMs, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess") + theme_bw()
## Warning: Removed 1472 rows containing non-finite values (stat_smooth).
## Warning: Removed 1472 rows containing missing values (geom_point).

Correlations between measures

library(corrplot)
## corrplot 0.84 loaded
corrplot(cor(metadata[metadata$sex == "male", c("TelSeqFinalRecalibrated", "SomaSNV_S3_LD", "mtCNLowDepth", "yCNLowDepth", "mity_LowDepth_NumVars", "Frailty_GripStrength", "Frailty_GaitSpeedMs")], method = "kendall", use = "pairwise.complete.obs"), method = "square", main = "Males")

corrplot(cor(metadata[metadata$sex == "female", c("TelSeqFinalRecalibrated", "SomaSNV_S3_LD", "mtCNLowDepth", "mity_LowDepth_NumVars", "Frailty_GripStrength", "Frailty_GaitSpeedMs")], method = "kendall", use = "pairwise.complete.obs"), method = "square", main = "Females")

Some correlation, but no strong colinearity.

Flexible model fits for plots

Use some simple GAM fits to aid in plotting, factoring out the cohort intercepts. We add a negligible amount of noise (+/- 6 months) to the age variable to prevent false positives with gam.check.

library(mgcv)

set.seed(314159)
metadata$AgeAtCollectionYears.jittered = metadata$AgeAtCollectionYears - 0.5 + runif(nrow(metadata))

plotfit.telseq = gam(TelSeqFinalRecalibrated ~ s(AgeAtCollectionYears.jittered, by = sex) + cohort, data = metadata)
gam.check(plotfit.telseq)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 5 iterations.
## The RMS GCV score gradient at convergence was 1.013275e-07 .
## The Hessian was positive definite.
## Model rank =  21 / 21 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                             k' edf k-index p-value
## s(AgeAtCollectionYears.jittered):sexfemale 9.0 2.8    1.01    0.82
## s(AgeAtCollectionYears.jittered):sexmale   9.0 3.3    1.01    0.70
plotfit.soma_snv = gam(log(SomaSNV_S3_LD + 0.2) ~ s(AgeAtCollectionYears.jittered, by = sex) + cohort, data = metadata)
gam.check(plotfit.soma_snv)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 5 iterations.
## The RMS GCV score gradient at convergence was 8.627284e-07 .
## The Hessian was positive definite.
## Model rank =  21 / 21 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                              k'  edf k-index p-value
## s(AgeAtCollectionYears.jittered):sexfemale 9.00 2.46    1.01    0.64
## s(AgeAtCollectionYears.jittered):sexmale   9.00 3.29    1.01    0.69
plotfit.mtcn = gam(log(mtCNLowDepth) ~ s(AgeAtCollectionYears.jittered, by = sex) + cohort, data = metadata)
gam.check(plotfit.mtcn)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 9 iterations.
## The RMS GCV score gradient at convergence was 1.880056e-06 .
## The Hessian was positive definite.
## Model rank =  21 / 21 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                              k'  edf k-index p-value
## s(AgeAtCollectionYears.jittered):sexfemale 9.00 8.49       1    0.34
## s(AgeAtCollectionYears.jittered):sexmale   9.00 4.55       1    0.42
plotfit.ycn = gam(yCNLowDepth ~ s(AgeAtCollectionYears.jittered) + cohort, data = metadata[metadata$sex == "male",])
gam.check(plotfit.ycn)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 4 iterations.
## The RMS GCV score gradient at convergence was 6.975827e-08 .
## The Hessian was positive definite.
## Model rank =  12 / 12 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                    k'  edf k-index p-value
## s(AgeAtCollectionYears.jittered) 9.00 3.77    1.01    0.69
# Odd residual distribution -- not linked to cohort

plotfit.mitynvar = gam(log(mity_LowDepth_NumVars+1) ~ s(AgeAtCollectionYears.jittered, by = sex) + cohort, data = metadata)
gam.check(plotfit.mitynvar)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 9 iterations.
## The RMS GCV score gradient at convergence was 1.159208e-06 .
## The Hessian was positive definite.
## Model rank =  21 / 21 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                              k'  edf k-index p-value
## s(AgeAtCollectionYears.jittered):sexfemale 9.00 7.36       1    0.56
## s(AgeAtCollectionYears.jittered):sexmale   9.00 5.69       1    0.65

Now construct the cohort intercept-corrected data for plotting, using the ASPREE intercepts as a reference:

temp = metadata
temp$cohort = "ASPREE"

metadata$plotfit.telseq = NA
metadata$plotfit.soma_snv = NA
metadata$plotfit.mtcn = NA
metadata$plotfit.ycn = NA
metadata$plotfit.mitynvar = NA

metadata$plotfit.telseq[!is.na(metadata$TelSeqFinalRecalibrated)] = predict(plotfit.telseq, newdata = temp[!is.na(metadata$TelSeqFinalRecalibrated),]) + residuals(plotfit.telseq)
metadata$plotfit.soma_snv[!is.na(metadata$SomaSNV_S3_LD)] = exp(predict(plotfit.soma_snv, newdata = temp[!is.na(metadata$SomaSNV_S3_LD),]) + residuals(plotfit.soma_snv) - 0.2)
metadata$plotfit.mtcn[!is.na(metadata$mtCNLowDepth)] = exp(predict(plotfit.mtcn, newdata = temp[!is.na(metadata$mtCNLowDepth),]) + residuals(plotfit.mtcn))
metadata$plotfit.ycn[!is.na(metadata$yCNLowDepth) & metadata$sex == "male"] = predict(plotfit.ycn, newdata = temp[!is.na(metadata$yCNLowDepth) & metadata$sex == "male",]) + residuals(plotfit.ycn)
metadata$plotfit.mitynvar[!is.na(metadata$mity_LowDepth_NumVars)] = exp(predict(plotfit.mitynvar, newdata = temp[!is.na(metadata$mity_LowDepth_NumVars),]) + residuals(plotfit.mitynvar) - 1)

Final cohort intercept-corrected plots. Note clipping in some cases to focus on functional form; only a minor fraction of points are lost.

ggplot(metadata, aes(x = AgeAtCollectionYears, y = plotfit.telseq, col = cohort)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw() + coord_cartesian(ylim = c(1, 5))
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

ggplot(metadata, aes(x = AgeAtCollectionYears, y = plotfit.soma_snv, col = cohort)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw() + scale_y_continuous(trans = 'log10') + coord_cartesian(ylim = c(0.1, 30))
## Warning: Removed 178 rows containing non-finite values (stat_smooth).
## Warning: Removed 178 rows containing missing values (geom_point).

ggplot(metadata, aes(x = AgeAtCollectionYears, y = plotfit.mtcn, col = cohort)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw() + coord_cartesian(ylim = c(0, 400))

ggplot(metadata[metadata$sex == "male",], aes(x = AgeAtCollectionYears, y = plotfit.ycn, col = cohort)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw() + coord_cartesian(ylim = c(0, 1))

ggplot(metadata, aes(x = AgeAtCollectionYears, y = plotfit.mitynvar, col = cohort)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw() + coord_cartesian(ylim = c(0, 10))
## Warning: Removed 204 rows containing non-finite values (stat_smooth).
## Warning: Removed 204 rows containing missing values (geom_point).

ggplot(metadata, aes(x = AgeAtCollectionYears, y = Frailty_GripStrength, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw()
## Warning: Removed 1494 rows containing non-finite values (stat_smooth).
## Warning: Removed 1494 rows containing missing values (geom_point).

ggplot(metadata, aes(x = AgeAtCollectionYears, y = Frailty_GaitSpeedMs, col = cohort, lty = sex)) + geom_point(alpha = 0.1) + geom_smooth(method = "loess", level = 0.99) + theme_bw()
## Warning: Removed 1472 rows containing non-finite values (stat_smooth).
## Warning: Removed 1472 rows containing missing values (geom_point).

Model fitting

Directly test the hypothesis: somatic measures are significantly associated with fraility measures after conditioning for conventional variables. Use flexible GAM models; this necessitates permutation testing. Because of the number of tests (5 somatic variables in males, 4 in females, 2 frailty measure = 18 tests), use a training/validation split to alleviate MT concerns.

Preparations

library(plyr)
library(doParallel)
registerDoParallel(28)

test.pval_threshold = 0.2

gam_permutation_test = function(formula, data, perm_vars, seed = 314159, B = 10000, ...)
{
    fit_deviance = deviance(gam(formula = formula, data = data, ...))
    perm_deviance = laply(1:B, function(i)
    {
        perm_data = data
        set.seed(seed + i)
        permutation = sample.int(nrow(perm_data), replace = FALSE)
        for (perm_var in perm_vars)
            perm_data[,perm_var] = perm_data[permutation,perm_var]
        deviance(gam(formula = formula, data = perm_data, ...))
    }, .parallel = TRUE)

    nextr = sum(perm_deviance <= fit_deviance)

    list(deviance = fit_deviance, deviance_perm = perm_deviance, p.value = (nextr+0.5)/(B+1))
}

metadata.aspree.complete = metadata[metadata$cohort == "ASPREE", c("sampleID", "Frailty_GripStrength", "Frailty_GaitSpeedMs", 
    "AgeAtCollectionYears", "TelSeqFinalRecalibrated", "SomaSNV_S3_LD", "mtCNLowDepth", "yCNLowDepth", "mity_LowDepth_NumVars", 
    "sex", "HtMtrs", "WtKgs", "AbdoCircCms", "GlcmmolL")]
metadata.aspree.complete = metadata.aspree.complete[!apply(is.na(metadata.aspree.complete), 1, any),]
metadata.aspree.complete$BMI = metadata.aspree.complete$WtKgs / metadata.aspree.complete$HtMtrs^2
set.seed(314159)
metadata.aspree.complete$split = sample(rep(c("training", "validation"), c(round(0.25*nrow(metadata.aspree.complete)), nrow(metadata.aspree.complete) - round(0.25*nrow(metadata.aspree.complete)))))

Transformations

set.seed(314159)
metadata.aspree.complete$AgeAtCollectionYears.jittered = metadata.aspree.complete$AgeAtCollectionYears + runif(nrow(metadata.aspree.complete)) - 0.5
metadata.aspree.complete$Frailty_GripStrength.trans = metadata.aspree.complete$Frailty_GripStrength^0.7
metadata.aspree.complete$SomaSNV_S3_LD.trans = log(metadata.aspree.complete$SomaSNV_S3_LD + 1e-1)
metadata.aspree.complete$mtCNLowDepth.trans = sqrt(metadata.aspree.complete$mtCNLowDepth)

Tests

Training: Grip strength

test.grip.telseq.male = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(TelSeqFinalRecalibrated), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("TelSeqFinalRecalibrated"))
test.grip.somas3.male = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(SomaSNV_S3_LD.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("SomaSNV_S3_LD.trans"))
test.grip.ycn.male = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(yCNLowDepth), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("yCNLowDepth"))
test.grip.mtcn.male = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mtCNLowDepth.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("mtCNLowDepth.trans"))
test.grip.mitynvars.male = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mity_LowDepth_NumVars), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("mity_LowDepth_NumVars"))

test.grip.telseq.female = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(TelSeqFinalRecalibrated), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("TelSeqFinalRecalibrated"))
test.grip.somas3.female = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(SomaSNV_S3_LD.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("SomaSNV_S3_LD.trans"))
test.grip.mtcn.female = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mtCNLowDepth.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("mtCNLowDepth.trans"))
test.grip.mitynvars.female = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mity_LowDepth_NumVars), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("mity_LowDepth_NumVars"))

Training: Gait speed

test.gait.telseq.male = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(TelSeqFinalRecalibrated), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("TelSeqFinalRecalibrated"))
test.gait.somas3.male = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(SomaSNV_S3_LD.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("SomaSNV_S3_LD.trans"))
test.gait.ycn.male = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(yCNLowDepth), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("yCNLowDepth"))
test.gait.mtcn.male = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mtCNLowDepth.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("mtCNLowDepth.trans"))
test.gait.mitynvars.male = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mity_LowDepth_NumVars), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "training",], perm_vars = c("mity_LowDepth_NumVars"))

test.gait.telseq.female = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(TelSeqFinalRecalibrated), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("TelSeqFinalRecalibrated"))
test.gait.somas3.female = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(SomaSNV_S3_LD.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("SomaSNV_S3_LD.trans"))
test.gait.mtcn.female = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mtCNLowDepth.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("mtCNLowDepth.trans"))
test.gait.mitynvars.female = gam_permutation_test(formula = Frailty_GaitSpeedMs ~ s(AgeAtCollectionYears) + s(HtMtrs) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mity_LowDepth_NumVars), data = metadata.aspree.complete[metadata.aspree.complete$sex == "female" & metadata.aspree.complete$split == "training",], perm_vars = c("mity_LowDepth_NumVars"))

Training: Summary

test.pvals = c(
    "grip.telseq.male" = test.grip.telseq.male$p.value,
    "grip.somas3.male" = test.grip.somas3.male$p.value,
    "grip.ycn.male" = test.grip.ycn.male$p.value,
    "grip.mtcn.male" = test.grip.mtcn.male$p.value,
    "grip.mityn.male" = test.grip.mitynvars.male$p.value,

    "grip.telseq.female" = test.grip.telseq.female$p.value,
    "grip.somas3.female" = test.grip.somas3.female$p.value,
    "grip.mtcn.female" = test.grip.mtcn.female$p.value,
    "grip.mityn.female" = test.grip.mitynvars.female$p.value,

    "gait.telseq.male" = test.gait.telseq.male$p.value,
    "gait.somas3.male" = test.gait.somas3.male$p.value,
    "gait.ycn.male" = test.gait.ycn.male$p.value,
    "gait.mtcn.male" = test.gait.mtcn.male$p.value,
    "gait.mityn.male" = test.gait.mitynvars.male$p.value,

    "gait.telseq.female" = test.gait.telseq.female$p.value,
    "gait.somas3.female" = test.gait.somas3.female$p.value,
    "gait.mtcn.female" = test.gait.mtcn.female$p.value,
    "gait.mityn.female" = test.gait.mitynvars.female$p.value
)
sort(test.pvals)
##     grip.mtcn.male   grip.telseq.male   gait.mtcn.female 
##         0.05114489         0.29272073         0.34691531 
##  grip.mityn.female    grip.mityn.male      gait.ycn.male 
##         0.45720428         0.47580242         0.50059994 
##   grip.somas3.male     gait.mtcn.male gait.telseq.female 
##         0.50499950         0.54399560         0.55599440 
##  gait.mityn.female   gait.somas3.male      grip.ycn.male 
##         0.60348965         0.60478952         0.72277772 
## grip.somas3.female   grip.mtcn.female gait.somas3.female 
##         0.73007699         0.73197680         0.76697330 
## grip.telseq.female    gait.mityn.male   gait.telseq.male 
##         0.90785921         0.94065593         0.95195480
which(test.pvals <= test.pval_threshold)
## grip.mtcn.male 
##              4

Validation

test.grip.mtcn.male.validation = gam_permutation_test(formula = Frailty_GripStrength.trans ~ s(AgeAtCollectionYears) + s(WtKgs) + s(BMI) + s(AbdoCircCms) + s(mtCNLowDepth.trans), data = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & metadata.aspree.complete$split == "validation",], perm_vars = c("mtCNLowDepth.trans"))

test.grip.mtcn.male.validation$p.value
## [1] 0.03564644

Illustration: effect of mtDNA CN on grip strength in males

Examine the influence of mtDNA CN on grip strength in men. Express this as biological years vs calendar years for an 80 calendar-year-old man (the median age of men in ASPREE).

Parameters

illustr.target_somatic_quant = unique(sort(c(0.499, 0.501, seq(0.01, 0.99, 0.01))))
illustr.ci_levels = c(0.5, 0.8, 0.9, 0.95)
illustr.R = 100000
illustr.target_age = 80

Local quantile calculation

# Calculate the local quantile of each point in x.  The local quantile
# of point i is defined as eCDF(x_i), where the eCDF is based on the
# x-values of all points within deltaz of z_i, not including point i
# itself: { x_j : z_i - deltaz <= z_j < z_i + deltaz, j != i }.
local_quantile = function(x, z, deltaz, group = rep(1, length(x)))
{
    n = length(x)
    as.data.frame(t(sapply(1:n, function(i) {
        x_i = x[i]
        z_i = z[i]
        group_i = group[i]
        neighbourhood_x = x[z >= z_i - deltaz & z < z_i + deltaz & 1:n != i & group == group_i]
        neighbourhood_size = length(neighbourhood_x)
        if (neighbourhood_size == 0)
            lq = 0
        else
            lq = ecdf(neighbourhood_x)(x_i)
        return(c(lq = lq, ls = neighbourhood_size))
    })))
}

for (temp.var in c("Frailty_GripStrength", "Frailty_GaitSpeedMs", "SomaSNV_S3_LD", "mtCNLowDepth", "mity_LowDepth_NumVars", "yCNLowDepth", "TelSeqFinalRecalibrated"))
{
    metadata.aspree.complete = cbind(metadata.aspree.complete, local_quantile(metadata.aspree.complete[,temp.var], metadata.aspree.complete$AgeAtCollectionYears, 1.1, metadata.aspree.complete$sex))
    metadata.aspree.complete[metadata.aspree.complete[,ncol(metadata.aspree.complete)] < 20, ncol(metadata.aspree.complete)-1] = NA
    colnames(metadata.aspree.complete)[ncol(metadata.aspree.complete)-1] = sprintf("%s.quant.lq", temp.var)
    colnames(metadata.aspree.complete)[ncol(metadata.aspree.complete)] = sprintf("%s.quant.ls", temp.var)
}

For the target age of 80, the local quantile is based on 293 data points, being ASPREE men with age in [79, 81].

illustr.metadata.aspree.complete.male = metadata.aspree.complete[metadata.aspree.complete$sex == "male" & !is.na(metadata.aspree.complete$mtCNLowDepth.quant.lq),]
range(illustr.metadata.aspree.complete.male$AgeAtCollectionYears)
## [1] 75 89
median(illustr.metadata.aspree.complete.male$AgeAtCollectionYears)
## [1] 80
sum(illustr.metadata.aspree.complete.male$AgeAtCollectionYears == median(illustr.metadata.aspree.complete.male$AgeAtCollectionYears))
## [1] 102
ggplot(illustr.metadata.aspree.complete.male, aes(x = AgeAtCollectionYears.jittered, y = Frailty_GaitSpeedMs)) + geom_point() + geom_smooth(method = "gam")

ggplot(illustr.metadata.aspree.complete.male, aes(x = AgeAtCollectionYears.jittered, y = Frailty_GripStrength.trans)) + geom_point() + geom_smooth(method = "gam")

ggplot(illustr.metadata.aspree.complete.male, aes(x = AgeAtCollectionYears.jittered, y = Frailty_GaitSpeedMs)) + geom_point() + geom_smooth(method = "gam") + coord_cartesian(ylim = c(.8, 1.2))

ggplot(illustr.metadata.aspree.complete.male, aes(x = AgeAtCollectionYears.jittered, y = Frailty_GripStrength.trans)) + geom_point() + geom_smooth(method = "gam") + coord_cartesian(ylim = c(8, 14))

It’s reasonable to model somatic ~ age as linear, thereby considerably simplifying the inversion performed below. I consider this fine for illustrative purposes, which is the goal here. At worst, the slight double-dipping will falsely bias the bootstrap variance downwards by a small amount.

Bootstrapping

library(boot)

somatic_effect_bootstrap_statistic = function(data, indices, target_age, target_somatic_quant)
{
    data = data[indices,]

    fit = gam(frailty ~ age + s(somatic_quant), data = data)

    preds_frailty_age_mediansom = expand.grid(age = seq(min(data$age), max(data$age), 1), somatic_quant = 0.5)
    preds_frailty_targetage_allsom = expand.grid(age = target_age, somatic_quant = target_somatic_quant)

    preds_frailty_age_mediansom$frailty_somatic.pred = predict(fit, newdata = preds_frailty_age_mediansom, se.fit = FALSE)
    preds_frailty_targetage_allsom$frailty_somatic.pred = predict(fit, newdata = preds_frailty_targetage_allsom, se.fit = FALSE)

    frailty2age_mediansomatic = approxfun(x = preds_frailty_age_mediansom$frailty_somatic.pred, y = preds_frailty_age_mediansom$age)
    preds_frailty_targetage_allsom$frailtyage_somatic.pred = frailty2age_mediansomatic(preds_frailty_targetage_allsom$frailty_somatic.pred)

    preds_frailty_targetage_allsom$age_excess = preds_frailty_targetage_allsom$frailtyage_somatic.pred - preds_frailty_targetage_allsom$age
    preds_frailty_targetage_allsom$age_excess
}

somatic_effect_bootstrap = function(age, frailty, somatic_quant, target_age, target_somatic_quant, seed = NA, ...)
{
    if (!is.na(seed))
        set.seed(seed)

    boot(
        data = data.frame(age = age, frailty = frailty, somatic_quant = somatic_quant),
        statistic = function(data, indices) somatic_effect_bootstrap_statistic(data, indices, target_age, target_somatic_quant), 
        ...)
}

somatic_effect_bootsummary = function(boot_result, mtcn_quantile, ci_levels)
{
    bootsummary = expand.grid(mtcn_quantile = mtcn_quantile, ci_level = ci_levels)
    bootsummary = ddply(bootsummary, colnames(bootsummary), function(d) {
        ci = try(boot.ci(boot_result, conf = d$ci_level, index = which(mtcn_quantile == d$mtcn_quantile), type = "bca"))
        d$boot.method = "BCa"
        if (class(ci) == "try-error")
        {
            d$boot.lcl = NA
            d$boot.ucl = NA
        }
        else
        {
            d$boot.lcl = ci$bca[4]
            d$boot.ucl = ci$bca[5]
        }
        d}, .parallel = TRUE)
    bootsummary$statistic = boot_result$t0[match(bootsummary$mtcn_quantile, mtcn_quantile)]
    bootsummary
}

mtDNA effect on grip strength in males

illustr.grip_mtcn_bootstrap = somatic_effect_bootstrap(
    age = illustr.metadata.aspree.complete.male$AgeAtCollectionYears,
    frailty = illustr.metadata.aspree.complete.male$Frailty_GripStrength.trans,
    somatic_quant = illustr.metadata.aspree.complete.male$mtCNLowDepth.quant.lq,
    target_age = illustr.target_age,
    target_somatic_quant = illustr.target_somatic_quant,
    seed = 314159,
    R = illustr.R)

illustr.grip_mtcn_bootstrap.summary = somatic_effect_bootsummary(illustr.grip_mtcn_bootstrap, mtcn_quantile = illustr.target_somatic_quant, ci_levels = illustr.ci_levels)

ggplot(illustr.grip_mtcn_bootstrap.summary, aes(x = mtcn_quantile, y = statistic, ymin = boot.lcl, ymax = boot.ucl, fill = ordered(ci_level, levels = rev(sort(illustr.ci_levels))))) + 
    geom_ribbon(alpha = 0.8) +
    geom_line(lwd = 1) + 
    xlab("Mitochondrial depth quantile") + 
    ylab("Grip strength anomaly (years)") + 
    ggtitle("Effect on mitochondrial depth on grip strength in men") + 
    theme_bw() +
    scale_fill_brewer() + 
    guides(fill = guide_legend(title = "CL")) +
    geom_rug(data = illustr.metadata.aspree.complete.male, mapping = aes(x = mtCNLowDepth.quant.lq), sides = "b", alpha = 0.2, inherit.aes = FALSE)

# An alterative take, as the bootstrap CIs arguably aren't entirely sensible -- we're
# not proposing a formal test here after all, but rather just a visualisation.  In that
# context a HPD interval, which effectively is just a neater way to overplot the bootstrap
# runs, makes more sense.
library(HDInterval)

illustr.hpd_levels = c(0.5, 0.8, 0.9)

illustr.grip_mtcn_bootstrap.summary_hdi = ldply(illustr.hpd_levels, function(credmass) {
    hdis = apply(illustr.grip_mtcn_bootstrap$t, 2, hdi, credMass = credmass)
    data.frame(
        mtcn_quantile = illustr.target_somatic_quant,
        credmass = credmass,
        hdi.lcl = hdis[1,],
        hdi.ucl = hdis[2,],
        statistic = illustr.grip_mtcn_bootstrap$t0)
})

ggplot(illustr.grip_mtcn_bootstrap.summary_hdi, aes(x = mtcn_quantile, y = statistic, ymin = hdi.lcl, ymax = hdi.ucl, fill = ordered(credmass, levels = rev(sort(illustr.hpd_levels))))) + 
    geom_ribbon(alpha = 0.8) +
    geom_line(lwd = 1) + 
    xlab("Mitochondrial depth quantile") + 
    ylab("Grip strength anomaly (years)") + 
    ggtitle("Effect on mitochondrial depth on grip strength in men") + 
    theme_bw() +
    scale_fill_brewer() + 
    guides(fill = guide_legend(title = "Credible mass")) +
    geom_rug(data = illustr.metadata.aspree.complete.male, mapping = aes(x = mtCNLowDepth.quant.lq), sides = "b", alpha = 0.2, inherit.aes = FALSE)